import statsmodels.api as sm
import pandas as pd
import statsmodels.formula.api as smf
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.linear_model import LinearRegression
import numpy as np
df=pd.read_csv('/Users/maha/Downloads/wages_by_education.csv')
df.head()
| year | less_than_hs | high_school | some_college | bachelors_degree | advanced_degree | men_less_than_hs | men_high_school | men_some_college | men_bachelors_degree | ... | black_women_less_than_hs | black_women_high_school | black_women_some_college | black_women_bachelors_degree | black_women_advanced_degree | hispanic_women_less_than_hs | hispanic_women_high_school | hispanic_women_some_college | hispanic_women_bachelors_degree | hispanic_women_advanced_degree | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022 | 16.52 | 21.94 | 24.81 | 41.60 | 53.22 | 17.99 | 24.08 | 27.96 | 49.01 | ... | 13.89 | 17.83 | 20.36 | 29.94 | 39.41 | 14.74 | 18.18 | 20.64 | 31.13 | 40.64 |
| 1 | 2021 | 16.74 | 22.28 | 24.92 | 41.32 | 53.45 | 18.34 | 24.36 | 27.96 | 47.83 | ... | 13.73 | 18.30 | 20.18 | 29.35 | 40.07 | 14.97 | 18.34 | 21.14 | 31.25 | 42.47 |
| 2 | 2020 | 17.02 | 22.70 | 25.44 | 41.65 | 53.74 | 18.76 | 25.09 | 28.55 | 48.15 | ... | 13.66 | 17.93 | 20.31 | 31.38 | 42.44 | 14.58 | 18.50 | 20.69 | 31.55 | 44.15 |
| 3 | 2019 | 16.11 | 21.64 | 24.00 | 39.61 | 51.57 | 17.55 | 23.99 | 26.99 | 45.74 | ... | 13.30 | 17.18 | 19.17 | 30.31 | 40.42 | 14.50 | 17.71 | 19.69 | 30.18 | 42.30 |
| 4 | 2018 | 15.94 | 21.50 | 23.70 | 38.87 | 51.03 | 17.70 | 23.72 | 26.61 | 44.97 | ... | 12.48 | 16.81 | 18.57 | 30.85 | 39.64 | 13.47 | 17.28 | 19.29 | 29.47 | 39.35 |
5 rows × 61 columns
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 50 entries, 0 to 49 Data columns (total 61 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 year 50 non-null int64 1 less_than_hs 50 non-null float64 2 high_school 50 non-null float64 3 some_college 50 non-null float64 4 bachelors_degree 50 non-null float64 5 advanced_degree 50 non-null float64 6 men_less_than_hs 50 non-null float64 7 men_high_school 50 non-null float64 8 men_some_college 50 non-null float64 9 men_bachelors_degree 50 non-null float64 10 men_advanced_degree 50 non-null float64 11 women_less_than_hs 50 non-null float64 12 women_high_school 50 non-null float64 13 women_some_college 50 non-null float64 14 women_bachelors_degree 50 non-null float64 15 women_advanced_degree 50 non-null float64 16 white_less_than_hs 50 non-null float64 17 white_high_school 50 non-null float64 18 white_some_college 50 non-null float64 19 white_bachelors_degree 50 non-null float64 20 white_advanced_degree 50 non-null float64 21 black_less_than_hs 50 non-null float64 22 black_high_school 50 non-null float64 23 black_some_college 50 non-null float64 24 black_bachelors_degree 50 non-null float64 25 black_advanced_degree 50 non-null float64 26 hispanic_less_than_hs 50 non-null float64 27 hispanic_high_school 50 non-null float64 28 hispanic_some_college 50 non-null float64 29 hispanic_bachelors_degree 50 non-null float64 30 hispanic_advanced_degree 50 non-null float64 31 white_men_less_than_hs 50 non-null float64 32 white_men_high_school 50 non-null float64 33 white_men_some_college 50 non-null float64 34 white_men_bachelors_degree 50 non-null float64 35 white_men_advanced_degree 50 non-null float64 36 black_men_less_than_hs 50 non-null float64 37 black_men_high_school 50 non-null float64 38 black_men_some_college 50 non-null float64 39 black_men_bachelors_degree 50 non-null float64 40 black_men_advanced_degree 50 non-null float64 41 hispanic_men_less_than_hs 50 non-null float64 42 hispanic_men_high_school 50 non-null float64 43 hispanic_men_some_college 50 non-null float64 44 hispanic_men_bachelors_degree 50 non-null float64 45 hispanic_men_advanced_degree 50 non-null float64 46 white_women_less_than_hs 50 non-null float64 47 white_women_high_school 50 non-null float64 48 white_women_some_college 50 non-null float64 49 white_women_bachelors_degree 50 non-null float64 50 white_women_advanced_degree 50 non-null float64 51 black_women_less_than_hs 50 non-null float64 52 black_women_high_school 50 non-null float64 53 black_women_some_college 50 non-null float64 54 black_women_bachelors_degree 50 non-null float64 55 black_women_advanced_degree 50 non-null float64 56 hispanic_women_less_than_hs 50 non-null float64 57 hispanic_women_high_school 50 non-null float64 58 hispanic_women_some_college 50 non-null float64 59 hispanic_women_bachelors_degree 50 non-null float64 60 hispanic_women_advanced_degree 50 non-null float64 dtypes: float64(60), int64(1) memory usage: 24.0 KB
fig = px.line(df, x='year', y=['less_than_hs', 'high_school', 'some_college', 'bachelors_degree', 'advanced_degree'],
labels={'value': 'Wage', 'variable': 'Education Level'},
title='Wages by Education Level in the USA (1973-2022)')
fig.update_layout(
xaxis_title='Year',
yaxis_title='Wage',
legend_title='Education Level',
hovermode='x'
)
fig.show()
selected_year = [2020,2021,2022]
filtered_df = df[df['year'].isin(selected_year)]
filtered_df
| year | less_than_hs | high_school | some_college | bachelors_degree | advanced_degree | men_less_than_hs | men_high_school | men_some_college | men_bachelors_degree | ... | black_women_less_than_hs | black_women_high_school | black_women_some_college | black_women_bachelors_degree | black_women_advanced_degree | hispanic_women_less_than_hs | hispanic_women_high_school | hispanic_women_some_college | hispanic_women_bachelors_degree | hispanic_women_advanced_degree | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022 | 16.52 | 21.94 | 24.81 | 41.60 | 53.22 | 17.99 | 24.08 | 27.96 | 49.01 | ... | 13.89 | 17.83 | 20.36 | 29.94 | 39.41 | 14.74 | 18.18 | 20.64 | 31.13 | 40.64 |
| 1 | 2021 | 16.74 | 22.28 | 24.92 | 41.32 | 53.45 | 18.34 | 24.36 | 27.96 | 47.83 | ... | 13.73 | 18.30 | 20.18 | 29.35 | 40.07 | 14.97 | 18.34 | 21.14 | 31.25 | 42.47 |
| 2 | 2020 | 17.02 | 22.70 | 25.44 | 41.65 | 53.74 | 18.76 | 25.09 | 28.55 | 48.15 | ... | 13.66 | 17.93 | 20.31 | 31.38 | 42.44 | 14.58 | 18.50 | 20.69 | 31.55 | 44.15 |
3 rows × 61 columns
filtered_df.plot(x='year', y=['less_than_hs','high_school','some_college','bachelors_degree','advanced_degree'], kind='bar', legend=True)
plt.title('Bar Chart Example')
plt.xlabel('Years')
plt.ylabel('Values')
plt.show()
from pandasql import sqldf
df = pd.read_csv('/Users/maha/Downloads/wages_by_education.csv')
pysqldf = sqldf("SELECT * FROM df", globals())
sqldf("SELECT * FROM df")
| year | less_than_hs | high_school | some_college | bachelors_degree | advanced_degree | men_less_than_hs | men_high_school | men_some_college | men_bachelors_degree | ... | black_women_less_than_hs | black_women_high_school | black_women_some_college | black_women_bachelors_degree | black_women_advanced_degree | hispanic_women_less_than_hs | hispanic_women_high_school | hispanic_women_some_college | hispanic_women_bachelors_degree | hispanic_women_advanced_degree | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022 | 16.52 | 21.94 | 24.81 | 41.60 | 53.22 | 17.99 | 24.08 | 27.96 | 49.01 | ... | 13.89 | 17.83 | 20.36 | 29.94 | 39.41 | 14.74 | 18.18 | 20.64 | 31.13 | 40.64 |
| 1 | 2021 | 16.74 | 22.28 | 24.92 | 41.32 | 53.45 | 18.34 | 24.36 | 27.96 | 47.83 | ... | 13.73 | 18.30 | 20.18 | 29.35 | 40.07 | 14.97 | 18.34 | 21.14 | 31.25 | 42.47 |
| 2 | 2020 | 17.02 | 22.70 | 25.44 | 41.65 | 53.74 | 18.76 | 25.09 | 28.55 | 48.15 | ... | 13.66 | 17.93 | 20.31 | 31.38 | 42.44 | 14.58 | 18.50 | 20.69 | 31.55 | 44.15 |
| 3 | 2019 | 16.11 | 21.64 | 24.00 | 39.61 | 51.57 | 17.55 | 23.99 | 26.99 | 45.74 | ... | 13.30 | 17.18 | 19.17 | 30.31 | 40.42 | 14.50 | 17.71 | 19.69 | 30.18 | 42.30 |
| 4 | 2018 | 15.94 | 21.50 | 23.70 | 38.87 | 51.03 | 17.70 | 23.72 | 26.61 | 44.97 | ... | 12.48 | 16.81 | 18.57 | 30.85 | 39.64 | 13.47 | 17.28 | 19.29 | 29.47 | 39.35 |
| 5 | 2017 | 15.92 | 21.26 | 23.31 | 38.65 | 49.40 | 17.63 | 23.47 | 25.91 | 44.50 | ... | 12.99 | 16.33 | 18.76 | 29.19 | 38.26 | 13.36 | 17.02 | 19.60 | 29.69 | 38.43 |
| 6 | 2016 | 15.45 | 21.00 | 23.35 | 38.73 | 49.64 | 16.95 | 23.01 | 25.98 | 44.98 | ... | 12.42 | 16.71 | 18.88 | 29.56 | 38.55 | 13.40 | 17.11 | 19.26 | 29.41 | 37.77 |
| 7 | 2015 | 15.25 | 20.81 | 23.17 | 37.96 | 48.42 | 16.63 | 22.93 | 25.99 | 43.47 | ... | 12.79 | 16.21 | 18.50 | 30.42 | 37.37 | 13.42 | 16.99 | 18.66 | 29.71 | 36.37 |
| 8 | 2014 | 14.66 | 20.34 | 22.48 | 36.47 | 47.27 | 15.95 | 22.40 | 24.96 | 41.14 | ... | 12.47 | 16.09 | 18.39 | 30.20 | 37.88 | 12.61 | 16.55 | 18.22 | 28.53 | 37.16 |
| 9 | 2013 | 14.62 | 20.31 | 22.50 | 37.00 | 48.54 | 15.86 | 22.28 | 25.07 | 42.34 | ... | 12.41 | 16.60 | 18.32 | 29.57 | 38.40 | 12.80 | 16.23 | 18.70 | 28.20 | 36.92 |
| 10 | 2012 | 14.80 | 20.50 | 22.56 | 36.76 | 48.47 | 16.15 | 22.65 | 25.05 | 42.25 | ... | 12.20 | 16.52 | 18.66 | 29.43 | 37.46 | 12.61 | 16.41 | 18.78 | 27.58 | 38.91 |
| 11 | 2011 | 14.96 | 20.67 | 22.90 | 36.44 | 47.56 | 16.16 | 22.81 | 25.34 | 41.41 | ... | 13.40 | 16.56 | 18.80 | 30.49 | 38.01 | 13.06 | 16.56 | 19.44 | 28.19 | 39.59 |
| 12 | 2010 | 15.04 | 21.06 | 23.56 | 37.42 | 48.80 | 16.32 | 23.28 | 26.24 | 42.71 | ... | 13.63 | 17.33 | 19.61 | 30.53 | 37.74 | 12.89 | 16.85 | 19.65 | 28.39 | 41.27 |
| 13 | 2009 | 15.41 | 21.42 | 23.82 | 37.30 | 49.12 | 16.91 | 23.74 | 26.75 | 43.04 | ... | 13.23 | 17.26 | 19.88 | 29.59 | 39.37 | 13.24 | 17.43 | 19.63 | 29.44 | 41.34 |
| 14 | 2008 | 15.08 | 20.93 | 23.52 | 37.04 | 47.55 | 16.56 | 23.30 | 26.32 | 42.34 | ... | 12.78 | 16.69 | 19.55 | 30.50 | 38.24 | 13.02 | 16.70 | 19.86 | 28.68 | 37.34 |
| 15 | 2007 | 15.19 | 21.04 | 23.84 | 37.23 | 47.31 | 16.60 | 23.39 | 26.66 | 42.55 | ... | 13.08 | 17.22 | 20.07 | 29.77 | 38.75 | 12.76 | 16.91 | 19.94 | 29.81 | 40.13 |
| 16 | 2006 | 14.90 | 21.17 | 23.69 | 36.99 | 47.23 | 16.31 | 23.55 | 26.39 | 41.97 | ... | 13.25 | 17.02 | 19.94 | 30.53 | 38.06 | 12.68 | 16.90 | 19.62 | 28.80 | 38.93 |
| 17 | 2005 | 14.96 | 21.07 | 23.76 | 36.89 | 47.09 | 16.46 | 23.34 | 26.49 | 41.96 | ... | 12.90 | 17.07 | 19.69 | 29.25 | 38.33 | 12.64 | 16.96 | 19.73 | 28.61 | 38.19 |
| 18 | 2004 | 15.13 | 21.33 | 24.11 | 36.78 | 47.48 | 16.66 | 23.74 | 27.03 | 41.74 | ... | 12.81 | 17.51 | 20.01 | 30.28 | 38.36 | 12.69 | 17.24 | 20.08 | 29.68 | 37.50 |
| 19 | 2003 | 15.27 | 21.50 | 24.19 | 37.14 | 46.79 | 16.79 | 23.89 | 27.04 | 42.18 | ... | 13.13 | 17.59 | 20.45 | 31.02 | 37.78 | 13.04 | 17.38 | 20.13 | 28.66 | 35.91 |
| 20 | 2002 | 15.18 | 21.39 | 24.25 | 37.26 | 47.36 | 16.70 | 23.92 | 27.10 | 42.36 | ... | 12.84 | 17.41 | 20.15 | 30.11 | 38.80 | 12.80 | 16.66 | 20.03 | 28.39 | 37.85 |
| 21 | 2001 | 14.88 | 21.10 | 24.05 | 37.07 | 46.23 | 16.23 | 23.67 | 27.12 | 42.14 | ... | 13.04 | 16.97 | 19.70 | 30.11 | 37.22 | 12.83 | 16.88 | 19.71 | 28.71 | 36.79 |
| 22 | 2000 | 14.67 | 20.80 | 23.68 | 36.44 | 45.99 | 16.13 | 23.48 | 26.78 | 41.41 | ... | 12.95 | 16.81 | 19.59 | 30.18 | 36.87 | 12.29 | 16.53 | 19.38 | 28.28 | 35.66 |
| 23 | 1999 | 14.61 | 20.67 | 23.36 | 35.76 | 46.02 | 16.16 | 23.31 | 26.34 | 40.71 | ... | 12.72 | 16.42 | 19.21 | 29.54 | 37.04 | 12.16 | 16.38 | 19.03 | 28.09 | 35.95 |
| 24 | 1998 | 14.57 | 20.39 | 23.02 | 34.97 | 44.49 | 16.04 | 22.99 | 25.86 | 39.63 | ... | 12.57 | 16.25 | 19.29 | 28.68 | 36.92 | 12.10 | 16.27 | 19.21 | 27.55 | 34.36 |
| 25 | 1997 | 14.07 | 19.95 | 22.54 | 33.44 | 43.68 | 15.44 | 22.58 | 25.33 | 37.84 | ... | 12.25 | 15.83 | 18.78 | 26.60 | 33.90 | 11.83 | 15.66 | 18.37 | 27.23 | 34.91 |
| 26 | 1996 | 13.95 | 19.62 | 22.11 | 32.50 | 43.35 | 15.45 | 22.11 | 24.79 | 36.80 | ... | 11.77 | 15.70 | 17.92 | 26.03 | 35.73 | 11.77 | 15.53 | 18.28 | 26.75 | 38.63 |
| 27 | 1995 | 13.95 | 19.68 | 22.04 | 32.86 | 43.27 | 15.39 | 22.20 | 24.78 | 37.07 | ... | 12.07 | 15.53 | 18.59 | 27.99 | 37.90 | 11.35 | 15.91 | 18.17 | 26.66 | 36.22 |
| 28 | 1994 | 14.33 | 19.88 | 22.26 | 32.72 | 43.46 | 15.72 | 22.39 | 24.89 | 36.89 | ... | 12.45 | 15.79 | 18.90 | 27.01 | 37.24 | 12.07 | 16.47 | 18.86 | 26.70 | 36.96 |
| 29 | 1993 | 14.60 | 19.65 | 22.27 | 32.16 | 41.45 | 16.09 | 22.18 | 24.84 | 36.37 | ... | 12.73 | 15.84 | 18.70 | 27.45 | 35.67 | 11.94 | 16.25 | 18.79 | 25.57 | 36.49 |
| 30 | 1992 | 14.87 | 19.65 | 22.26 | 32.16 | 40.94 | 16.42 | 22.32 | 24.95 | 36.38 | ... | 13.21 | 15.73 | 18.98 | 27.01 | 34.10 | 12.18 | 15.79 | 18.89 | 26.66 | 34.62 |
| 31 | 1991 | 14.99 | 19.77 | 22.49 | 31.87 | 40.71 | 16.66 | 22.49 | 25.46 | 36.36 | ... | 12.61 | 15.84 | 18.91 | 26.81 | 32.58 | 12.19 | 16.11 | 18.51 | 25.40 | 33.75 |
| 32 | 1990 | 15.05 | 19.74 | 22.58 | 32.21 | 39.95 | 16.90 | 22.69 | 25.75 | 36.77 | ... | 12.20 | 15.63 | 18.77 | 25.51 | 32.97 | 12.07 | 15.93 | 18.33 | 26.63 | 32.33 |
| 33 | 1989 | 15.46 | 20.07 | 22.61 | 31.89 | 39.80 | 17.63 | 23.31 | 25.78 | 36.60 | ... | 12.14 | 15.69 | 19.00 | 25.97 | 34.39 | 12.13 | 15.95 | 19.17 | 25.12 | 32.88 |
| 34 | 1988 | 15.79 | 20.66 | 23.03 | 33.41 | 41.12 | 18.02 | 24.18 | 26.71 | 39.34 | ... | 12.32 | 16.17 | 18.67 | 26.04 | 35.80 | 12.27 | 15.77 | 19.43 | 24.59 | 31.65 |
| 35 | 1987 | 15.76 | 20.60 | 23.17 | 33.19 | 40.55 | 18.05 | 24.17 | 27.02 | 39.07 | ... | 12.60 | 16.03 | 18.76 | 25.21 | 33.45 | 12.31 | 15.97 | 18.92 | 25.01 | 32.33 |
| 36 | 1986 | 16.10 | 20.61 | 23.12 | 32.67 | 39.88 | 18.44 | 24.24 | 27.04 | 38.52 | ... | 12.78 | 16.09 | 18.48 | 24.60 | 33.04 | 12.18 | 16.35 | 18.68 | 24.92 | 28.90 |
| 37 | 1985 | 16.08 | 20.40 | 22.60 | 31.50 | 38.31 | 18.46 | 24.14 | 26.25 | 37.00 | ... | 12.45 | 15.87 | 18.30 | 25.15 | 31.28 | 12.40 | 15.89 | 18.38 | 24.12 | 29.40 |
| 38 | 1984 | 16.21 | 20.30 | 22.37 | 31.07 | 37.45 | 18.66 | 24.06 | 25.97 | 36.63 | ... | 12.39 | 15.65 | 18.04 | 23.66 | 31.81 | 12.39 | 16.25 | 17.79 | 22.23 | 30.82 |
| 39 | 1983 | 16.43 | 20.43 | 22.28 | 30.73 | 36.78 | 18.89 | 24.33 | 26.04 | 36.21 | ... | 12.92 | 15.94 | 17.71 | 23.73 | 30.64 | 12.35 | 16.02 | 18.94 | 23.17 | 26.31 |
| 40 | 1982 | 16.73 | 20.65 | 22.43 | 30.55 | 36.08 | 19.34 | 24.64 | 26.22 | 35.80 | ... | 12.81 | 15.95 | 17.88 | 23.54 | 30.65 | 12.67 | 16.48 | 17.68 | 23.98 | 30.21 |
| 41 | 1981 | 17.07 | 20.68 | 22.47 | 30.17 | 35.32 | 19.64 | 24.84 | 26.24 | 35.41 | ... | 12.88 | 15.86 | 18.14 | 23.82 | 31.05 | 13.02 | 16.13 | 17.49 | 21.29 | 31.21 |
| 42 | 1980 | 17.42 | 20.90 | 22.73 | 30.04 | 35.53 | 20.08 | 25.18 | 26.63 | 35.16 | ... | 12.98 | 16.09 | 18.28 | 23.99 | 31.58 | 12.87 | 16.16 | 17.86 | 22.60 | 28.23 |
| 43 | 1979 | 17.95 | 21.62 | 23.20 | 30.57 | 36.19 | 20.74 | 26.04 | 27.18 | 35.72 | ... | 13.33 | 16.30 | 18.54 | 23.84 | 29.34 | 13.20 | 16.27 | 18.02 | 21.94 | 30.87 |
| 44 | 1978 | 17.29 | 21.52 | 23.25 | 30.90 | 37.56 | 20.10 | 25.97 | 27.14 | 35.92 | ... | 12.67 | 17.00 | 18.24 | 25.47 | 34.08 | 13.08 | 15.72 | 17.56 | 21.26 | 28.97 |
| 45 | 1977 | 17.59 | 21.50 | 22.97 | 31.07 | 37.36 | 20.43 | 25.97 | 26.70 | 36.09 | ... | 12.80 | 16.30 | 18.87 | 25.83 | 37.10 | 12.66 | 17.22 | 17.12 | 21.49 | 30.31 |
| 46 | 1976 | 17.52 | 21.76 | 23.49 | 31.46 | 37.50 | 20.36 | 26.14 | 27.10 | 36.42 | ... | 12.75 | 16.40 | 19.11 | 26.10 | 34.92 | 13.79 | 17.29 | 17.40 | 20.29 | 20.93 |
| 47 | 1975 | 17.30 | 21.55 | 23.30 | 31.45 | 38.41 | 20.00 | 26.02 | 26.93 | 36.21 | ... | 12.48 | 15.88 | 18.11 | 27.93 | 33.03 | 12.87 | 16.04 | 17.04 | 18.85 | 23.63 |
| 48 | 1974 | 17.68 | 21.60 | 23.32 | 31.69 | 38.37 | 20.63 | 26.15 | 26.79 | 36.62 | ... | 12.31 | 15.55 | 18.19 | 27.80 | 33.86 | 13.24 | 16.29 | 18.17 | 21.70 | 26.74 |
| 49 | 1973 | 18.06 | 22.22 | 24.08 | 32.80 | 38.16 | 21.18 | 26.90 | 27.67 | 37.69 | ... | 12.02 | 15.79 | 18.11 | 28.35 | 41.81 | 12.92 | 16.46 | 20.22 | 22.23 | 25.10 |
50 rows × 61 columns
comparison_result = (sqldf("SELECT year, AVG(men_high_school) AS men_high_school,AVG(women_high_school) AS women_high_school,AVG(men_advanced_degree) AS men_advanced_degree, AVG(women_advanced_degree) AS women_advanced_degree FROM df GROUP BY year"))
print("Overview of Data Manipulation:")
plt.figure(figsize=(12, 8))
plt.plot(comparison_result['year'], comparison_result['men_high_school'], label='Men - High School')
plt.plot(comparison_result['year'], comparison_result['men_advanced_degree'], label='Men - Advanced Degree')
plt.plot(comparison_result['year'], comparison_result['women_high_school'], label='Women - High School')
plt.plot(comparison_result['year'], comparison_result['women_advanced_degree'], label='Women - Advanced Degree')
plt.title('Comparison of Average Wages for High School-educated Men and Women Over the Years')
plt.xlabel('Year')
plt.ylabel('Average Wages')
plt.legend()
plt.show()
Overview of Data Manipulation: